#!/bin/bash

# get my installed prefix
mypath="`type -p $0`"
case "$mypath" in /*) :;; *) mypath="$PWD/$mypath" ;; esac
myprefix="`dirname $mypath`"
PATH=$myprefix:$PATH:.

verbose=:

error ()
{
 echo "ERROR:$@" >&2
 exit 1
}

usage ()
{
 echo "Usage: mxrun [ option ]  [-np <n>] prog args"
 echo  "   -machinefile <file>   Specifies a machine file"
 echo  "   --mx-wait <n>   Wait <n> seconds between each spawning step";
 echo  "   --mx-ssh <ssh-opts>   Add <ssh-opt> to the options on the ssh command line"
 exit 1
}

np=1
machfile=$HOME/.mx/machlist
mxmon="`type -p mxmon`"
mxenv=

while test "$#" -gt 0 ; do
 case "$1" in
  -machinefile)
    machfile="$2";shift;;
  -np)
    np="$2";shift;;
  -v)
    verbose=echo;;
  --mx-wait)
    wait="$2";shift;;
  --mx-dflag)
    dflag=1;;
  --mx-label-nic)
    label_nic=1;;
  --mx-ssh)
    ssh_opt="$2";shift;;
  --mx-nomon)
    mxmon=;;
  -*help*)
    usage;;  
  -*)
    error "unknown option: $1";;
  *=*)
    mxenv="$mxenv $1";;
  *)
   break;;
 esac
 shift
done

prog="$1"
shift

if ! test -x "$prog" ; then
 prog2=`type -p $prog`
 test -n "$prog2" || error "$prog not executable"
 prog="$prog2"
fi

machlist="`cat $machfile`"

args="$*"

set -- $machlist


handle_signal () {
  for p in $pids ; do kill $p 2>/dev/null;done
  rm -f /tmp/mxrun.$$
  exit 1
}

trap handle_signal 1 2 15

mkfifo /tmp/mxrun.$$

# use a FIFO to detect termination from one process
cat /tmp/mxrun.$$ &
catpid=$!
exec 4>/tmp/mxrun.$$

i=0
lrank=0
while test $i -lt $np ; do
 if test "$#" -eq 0 ; then
  set -- $machlist
  lrank=`expr $lrank + 1`
 fi
 nic="$1"
 m=`echo $nic  | sed -e 's/:[0-9]*$//'`
 if test "$m" = "$nic" ; then
  nic="$nic:0"
 fi
 nic_index=`expr $nic : '.*:\(.*\)'`
 if test -n "$label_nic" ; then
   label=$nic
 else
   label=$i
 fi
 if  test -n  "$dflag" && test $i -gt 0; then
  sleep 1
  dflagopt="-d $mach0"
 else
   
  mach0=$nic
  dflagopt=
 fi
 for c in $verbose "" ; do 
   $c ssh $ssh_opt -n $m "cd $PWD && exec env MXSMPI_LABEL=$label MXSMPI_NIC=$nic_index MXSMPI_RANK=$i MXSMPI_EID=$lrank MXSMPI_NP=$np MXSMPI_FILE=$machfile $mxenv $mxmon $prog $args $dflagopt" > /tmp/mxrun.$$ &
 done
 pids="$! $pids"
 i=`expr $i + 1`
 shift
 test -n "$wait" && sleep $wait
 test -z "$ssh_opt" && ssh_opt=-x
done 

rm -f /tmp/mxrun.$$
exec 4>&-

wait $catpid
